Bringing together marine biodiversity, environmental and maritime boundaries data in R
Salvador Fernandez, Laura Marquez, Lotte Pohl
May 30th 2022
How to access, query and obtain the data
Visualize and get them ready for further exploration and analyses using R
Get and standardize data
Get more occurrence data
Get environmental data
Combine it all together
Introduction (13:30 - 14:00)
Exercise 1 (14:00 - 14:45)
Break (14:45 - 15:00)
Exercise 2 (15:00 - 15:30)
Exercise 3 (15:30 - 15:45)
Coffee Break (15:45 - 16:00)
Exercise 4 (16:30 - 16:15)
Close-off (16:15 - 16:30)
Use your own data!
Coordinate Reference Systems (CRS) provide a standardized way of describing locations.
https://www.nceas.ucsb.edu
# API libraries
library(lwdataexplorer)
library(worrms)
library(mregions2)
## Helper libraries
library(mapview)
library(sf)
#---------------------------------------------------------------------------------------------
# Exercise 1.1.:
# - Get fish data from the European Tracking Network of the year 2021
# - Remove white spaces in the species names
#
# Hint: use `?getEtnData()` to find more information
#---------------------------------------------------------------------------------------------
etn <- getEtnData(
startdate = "2015-01-01",
stopdate = "2016-01-01",
action = "Time bins",
by = "1 day",
networks = "All",
projects = "All"
)
# Inspect
View(etn)
#---------------------------------------------------------------------------------------------
# Exercise 1.2.:
# - Extract a list of species of the ETN data and
# - Find more taxonomic information using the WoRMS Taxon Match from the worrms R package.
# - Left join the taxonomic info to the ETN dataset on the scientific name
#
# Hint: use `dplyr::distinct()` or `base::unique()` to get unique values
# Hint: worrms functions start all like `wm_*`
# Hint: use `do.call(rbind, my_data_frame)` to turn a list into a data frame
#---------------------------------------------------------------------------------------------
# Unique list of species
species_list <- unique(etn$scientific_name)
# Taxon match
species_matched <- wm_records_taxamatch(species_list)
species_matched <- do.call(rbind, species_matched)
# Left join
etn <- merge(etn, species_matched, by.x = "scientific_name", by.y = "scientificname")
# Inspect
View(etn)
#---------------------------------------------------------------------------------------------
# Exercise 1.3.:
# - Use the mregions2 package to find and get the record for the Belgian Part of the North Sea.
# - Do a geospatial intersection between the latitude and longitude fields in the ETN
# data and the North Sea from Marine Regions.
#
# Hint: you must transform the ETN data from a data.frame to a simple feature `sf` class.
# Use the function `st_as_sf()` with the `coords` and `crs` arguments
# Hint: all functions from the sf package start as `sf_*()`
# For example: `st_intersection()`
#---------------------------------------------------------------------------------------------
# Find the North Sea
mr_belgian <- mr_gaz_records_by_names("Belgian")
View(mr_belgian)
bpns <- mr_gaz_record(3293)
# Transform data frame into simple feature object
etn <- st_as_sf(etn, coords = c("longitude", "latitude"), crs = 4326, remove = FALSE)
# Inspect both
mapview(list(etn, bpns))
# Perform the intersection
etn <- st_intersection(etn, bpns)
# Inspect again
mapview(list(etn, bpns))
#---------------------------------------------------------------------------------------------
# Bonus Exercise 1.4.:
# - Save the ETN data into disk
#
#---------------------------------------------------------------------------------------------
# To turn into a data.frame again
st_drop_geometry(etn)
# Save as shapefile or csv
st_write(etn, "./data/etn.shp")
write.csv(st_drop_geometry(etn), "./data/etn.csv")
%>%) to increase code readability and avoid nestingmean(as.numeric(df$temp)) translates to df$temp %>% as.numeric() %>% mean()# API library
library(eurobis)
# Helper library
library(dplyr)
#---------------------------------------------------------------------------------------------
# Exercise 2.1.:
# - Use the eurobis R package to get EurOBIS data for the same species, time and location.
#
# Hint: more info with ?eurobis_occurrences_basic()
#---------------------------------------------------------------------------------------------
mrgid = 3293
list_aphia = unique(etn$valid_AphiaID)
basic <- eurobis_occurrences_basic(
mrgid = mrgid,
aphiaid = species_matched$AphiaID,
start_date = "2015-01-01",
end_date = "2016-01-01"
)
#---------------------------------------------------------------------------------------------
# Exercise 2.2.:
# - Include more regions in your query!
# - Select your desired area with`eurobis_map_draw()` and pass to the argument `geometry`
# - Find more standardized marine regions with the family of functions `eurobis_map_regions_*()`
# - Find data for the species of your interest by passing a scientific name.
# - Find data for the functional group of your interest.
#---------------------------------------------------------------------------------------------
my_polygon <- eurobis_map_draw()
eurobis_map_regions_ecoregions()
eurobis_map_regions_eez()
eurobis_map_regions_iho()
eurobis_map_regions_eez_iho()
eurobis_map_regions_reportingareas()
basic <- eurobis_occurrences_basic(
geometry = my_polygon,
mrgid = c(2350),
functional_groups = c("birds", "mammals")
)
#---------------------------------------------------------------------------------------------
# Exercise 2.3.:
# - Wrangle the ETN dataset to fit the column names of eurobis.
# - Get only unique combinations of values
# - Bind the rows of both datasets
#
# Hint: the aphiaID from EurOBIS comes with a full URL. Use `gsub()` or `stringr::str_replace()`
# together with `as.numeric()` to get only the AphiaID
# Hint: we recommend to use `dplyr::transmute()`, `unique.data.frame()` and `dplyr::bind_rows()`
# Hint: EurOBIS follows the Darwin Core standard for naming the columns: https://dwc.tdwg.org/terms/
#---------------------------------------------------------------------------------------------
colnames(etn)
colnames(basic)
basic <- basic %>% select(-gml_id, -id, -datasetid, -coordinateuncertaintyinmeters
) %>% mutate(datecollected = as.character(datecollected),
aphiaid = gsub("http://marinespecies.org/aphia.php?p=taxdetails&id=", "", aphiaid, fixed = TRUE)
) %>% mutate(aphiaid = as.integer(aphiaid))
df <- etn %>%
transmute(
datecollected = time,
decimallongitude = longitude,
decimallatitude = latitude,
scientificname = scientific_name,
aphiaid = AphiaID,
scientificnameaccepted = valid_name
) %>% arrange(
scientificname, datecollected
) %>%
unique.data.frame(
) %>%
bind_rows(basic)
#---------------------------------------------------------------------------------------------
# Bonus Exercise 2.4.:
# - Repeat the query adding all the information available in EurOBIS
# - Get all the distinct values of the parameters_* coumns
# - Filter only records with count of individuals
#
# Hint: use `eurobis_occurrences_full_and_parameters()`
# Hint: more information here: https://www.emodnet-biology.eu/emodnet-data-format
#---------------------------------------------------------------------------------------------
full_emof <- eurobis_occurrences_full_and_parameters(
mrgid = mrgid,
aphiaid = species_matched$AphiaID,
start_date = "2015-01-01",
end_date = "2016-01-01"
)
# View all parameters available
full_emof %>%
st_drop_geometry() %>%
select(parameter,
parameter_bodcterm,
parameter_bodcterm_definition,
parameter_measurementtypeid) %>%
distinct() %>%
View()
# Query only Counts
full_emof <- full_emof %>%
filter(parameter_measurementtypeid == "http://vocab.nerc.ac.uk/collection/P01/current/OCOUNT01/")
Attention: raster::extract() and sf::st_join both do a spatial join. The former is used for raster data, the second one for vector data!
# API library
library(EMODnetWFS)
library(sdmpredictors)
# Helper library
library(dplyr)
library(mapview)
library(sf)
#---------------------------------------------------------------------------------------------
# Exercise 3.1.:
# - Get EMODnet Seabed Habitats using EMODnetWFS
# - Start client
# - Explore layers available
# - Select habitats directive layers about Sandbanks and Reefs
#
#---------------------------------------------------------------------------------------------
# Check services available
emodnet_wfs()
# Start client seabed habitats and check layers available
seabed_client <- emodnet_init_wfs_client("seabed_habitats_general_datasets_and_products")
seabed_info <- emodnet_get_wfs_info(seabed_client)
View(seabed_info)
# Get layer reefs
seabed_layers <- emodnet_get_layers(
wfs = seabed_client,
layers = "art17_hab_1170",
reduce_layers = TRUE,
crs = 4326
) %>%
st_cast(to = "GEOMETRYCOLLECTION") %>%
st_collection_extract(type = "POLYGON")
# Inspect
mapview(list(df, seabed_layers))
#---------------------------------------------------------------------------------------------
# Exercise 3.2.:
# - Get mean sea surface temperature from Bio-Oracle from present and future conditions using
# the sdmpredictors R package.
# - Get the same data for future conditions under RCP85, for both 2050 and 2100.
#
# Hint: List all layers with `list_layers("Bio-ORACLE)` and list_layers_future("Bio-ORACLE"). Look
# only at the latest version (2.2)
# Hint: layers are loaded by providing the layer code
#---------------------------------------------------------------------------------------------
# See all present layers available
list_layers <- list_layers("Bio-ORACLE", version = 2.2)
View(list_layers)
# Repeat for future layers
list_layers_future <- list_layers_future("Bio-ORACLE", version = 2.2)
View(list_layers_future)
# Get the codes of the layers
layer_codes <- c(
"BO22_tempmean_ss",
"BO22_RCP85_2050_tempmean_ss",
"BO22_RCP85_2100_tempmean_ss"
)
# Load the layers
layers <- load_layers(layer_codes, rasterstack = TRUE, datadir = "./data/")
# Inspect
mapview(layers)
# Inspect
mapview(list(df, layers$BO22_tempmean_ss))library(raster)
library(sf)
#---------------------------------------------------------------------------------------------
# Exercise 4.1.:
# - Join seabed data with the dataset
#
# Hint: use the sf package
#---------------------------------------------------------------------------------------------
test <- st_join(df1, seabed_layers)
#---------------------------------------------------------------------------------------------
# Exercise 4.2.:
# - Extract temperature data from Bio-Oracle layers
#
# Hint: use `raster::extract`. The geometries must be transformed into another spatial type. Try
# with `sf::as_Spatial()`
#---------------------------------------------------------------------------------------------
df1$temperature <- extract(layers$BO22_tempmean_ss, sf::as_Spatial(df1))
df1$temperature_2050 <- extract(layers$BO22_RCP85_2050_tempmean_ss, sf::as_Spatial(df1))
df1$temperature_2100 <- extract(layers$BO22_RCP85_2100_tempmean_ss, sf::as_Spatial(df1))
#---------------------------------------------------------------------------------------------
# Bonus Exercise 4.3.:
# - This is the end of the workshop. Feel free to try things on this curated dataset!
#
#---------------------------------------------------------------------------------------------
And now have fun with the workshop!